Import the data
hap_data <- fread("data/WorldHappiness.csv", header = T, stringsAsFactors = F)
country_data <- fread("data/world-data-2023.csv", header = T, stringsAsFactors = F)EDA
## Country happiness_score gdp_per_capita family health freedom
## 1: Norway 7.5370 1.6164632 1.533524 0.7966665 0.6354226
## 2: Denmark 7.5220 1.4823830 1.551122 0.7925655 0.6260067
## 3: Iceland 7.5040 1.4806330 1.610574 0.8335521 0.6271626
## 4: Switzerland 7.4940 1.5649796 1.516912 0.8581313 0.6200706
## 5: Finland 7.4690 1.4435719 1.540247 0.8091577 0.6179509
## ---
## 788: Botswana 3.4789 0.9975490 0.000000 0.4941017 0.5090894
## 789: Tanzania 3.4762 0.4571631 0.000000 0.4426779 0.5093431
## 790: Rwanda 3.3123 0.3432427 0.000000 0.5723833 0.6040879
## 791: Zimbabwe 3.2992 0.4255640 0.000000 0.3750376 0.3774047
## 792: Afghanistan 2.5669 0.3007058 0.000000 0.2660515 0.0000000
## generosity government_trust dystopia_residual continent Year
## 1: 0.36201224 0.315963835 2.2770267 Europe 2015
## 2: 0.35528049 0.400770068 2.3137074 Europe 2015
## 3: 0.47554022 0.153526559 2.3227153 Europe 2015
## 4: 0.29054928 0.367007285 2.2767162 Europe 2015
## 5: 0.24548277 0.382611543 2.4301815 Europe 2015
## ---
## 788: 0.03340749 0.101786368 0.2572405 Africa 2020
## 789: 0.27154091 0.203880861 0.7189634 Africa 2020
## 790: 0.23570499 0.485542476 0.5484450 Africa 2020
## 791: 0.15134919 0.080928579 0.8410311 Africa 2020
## 792: 0.13523471 0.001225785 1.5072356 Asia 2020
## social_support cpi_score
## 1: 0.0000000 88
## 2: 0.0000000 91
## 3: 0.0000000 79
## 4: 0.0000000 86
## 5: 0.0000000 90
## ---
## 788: 1.0856948 60
## 789: 0.8726746 38
## 790: 0.5228763 54
## 791: 1.0478352 24
## 792: 0.3564338 19
## [1] "Norway" "Denmark" "Iceland"
## [4] "Switzerland" "Finland" "Netherlands"
## [7] "Canada" "New Zealand" "Sweden"
## [10] "Australia" "Israel" "Costa Rica"
## [13] "Austria" "United States" "Ireland"
## [16] "Germany" "Belgium" "Luxembourg"
## [19] "United Kingdom" "Chile" "United Arab Emirates"
## [22] "Brazil" "Argentina" "Mexico"
## [25] "Singapore" "Malta" "Guatemala"
## [28] "Uruguay" "Panama" "France"
## [31] "Thailand" "Spain" "Colombia"
## [34] "Saudi Arabia" "Kuwait" "Slovakia"
## [37] "Bahrain" "Malaysia" "Nicaragua"
## [40] "Ecuador" "El Salvador" "Poland"
## [43] "Uzbekistan" "Italy" "Russia"
## [46] "Japan" "Lithuania" "Algeria"
## [49] "Latvia" "Moldova" "Romania"
## [52] "Bolivia" "Turkmenistan" "Kazakhstan"
## [55] "Slovenia" "Peru" "Mauritius"
## [58] "Cyprus" "Estonia" "Belarus"
## [61] "Libya" "Turkey" "Paraguay"
## [64] "Philippines" "Serbia" "Jordan"
## [67] "Hungary" "Jamaica" "Croatia"
## [70] "Kosovo" "China" "Pakistan"
## [73] "Indonesia" "Venezuela" "Montenegro"
## [76] "Morocco" "Azerbaijan" "Dominican Republic"
## [79] "Greece" "Lebanon" "Portugal"
## [82] "Bosnia and Herzegovina" "Honduras" "Nigeria"
## [85] "Vietnam" "Tajikistan" "Kyrgyzstan"
## [88] "Nepal" "Mongolia" "South Africa"
## [91] "Tunisia" "Egypt" "Bulgaria"
## [94] "Sierra Leone" "Cameroon" "Iran"
## [97] "Albania" "Bangladesh" "Kenya"
## [100] "Myanmar" "Senegal" "Zambia"
## [103] "Iraq" "Gabon" "Ethiopia"
## [106] "Sri Lanka" "Armenia" "India"
## [109] "Mauritania" "Georgia" "Mali"
## [112] "Cambodia" "Ghana" "Ukraine"
## [115] "Uganda" "Burkina Faso" "Niger"
## [118] "Malawi" "Chad" "Zimbabwe"
## [121] "Afghanistan" "Botswana" "Benin"
## [124] "Madagascar" "Haiti" "Yemen"
## [127] "Liberia" "Guinea" "Togo"
## [130] "Rwanda" "Tanzania" "Burundi"
## [133] "Switzerland" "Iceland" "Denmark"
## [136] "Norway" "Canada" "Finland"
## [139] "Netherlands" "Sweden" "New Zealand"
## [142] "Australia" "Israel" "Costa Rica"
## [145] "Austria" "Mexico" "United States"
## [148] "Brazil" "Luxembourg" "Ireland"
## [151] "Belgium" "United Arab Emirates" "United Kingdom"
## [154] "Venezuela" "Singapore" "Panama"
## [157] "Germany" "Chile" "France"
## [160] "Argentina" "Uruguay" "Colombia"
## [163] "Thailand" "Saudi Arabia" "Spain"
## [166] "Malta" "Kuwait" "El Salvador"
## [169] "Guatemala" "Uzbekistan" "Slovakia"
## [172] "Japan" "Ecuador" "Bahrain"
## [175] "Italy" "Bolivia" "Moldova"
## [178] "Paraguay" "Kazakhstan" "Slovenia"
## [181] "Lithuania" "Nicaragua" "Peru"
## [184] "Belarus" "Poland" "Malaysia"
## [187] "Croatia" "Libya" "Russia"
## [190] "Jamaica" "Cyprus" "Algeria"
## [193] "Kosovo" "Turkmenistan" "Mauritius"
## [196] "Estonia" "Indonesia" "Vietnam"
## [199] "Turkey" "Kyrgyzstan" "Nigeria"
## [202] "Azerbaijan" "Pakistan" "Jordan"
## [205] "Montenegro" "China" "Zambia"
## [208] "Romania" "Serbia" "Portugal"
## [211] "Latvia" "Philippines" "Morocco"
## [214] "Albania" "Bosnia and Herzegovina" "Dominican Republic"
## [217] "Mongolia" "Greece" "Lebanon"
## [220] "Hungary" "Honduras" "Tajikistan"
## [223] "Tunisia" "Bangladesh" "Iran"
## [226] "Ukraine" "Iraq" "South Africa"
## [229] "Ghana" "Zimbabwe" "Liberia"
## [232] "India" "Haiti" "Nepal"
## [235] "Ethiopia" "Sierra Leone" "Mauritania"
## [238] "Kenya" "Armenia" "Botswana"
## [241] "Myanmar" "Georgia" "Malawi"
## [244] "Sri Lanka" "Cameroon" "Bulgaria"
## [247] "Egypt" "Yemen" "Mali"
## [250] "Uganda" "Senegal" "Gabon"
## [253] "Niger" "Cambodia" "Tanzania"
## [256] "Madagascar" "Chad" "Guinea"
## [259] "Burkina Faso" "Afghanistan" "Rwanda"
## [262] "Benin" "Burundi" "Togo"
## [265] "Finland" "Denmark" "Norway"
## [268] "Iceland" "Netherlands" "Switzerland"
## [271] "Sweden" "New Zealand" "Canada"
## [274] "Austria" "Australia" "Costa Rica"
## [277] "Israel" "Luxembourg" "United Kingdom"
## [280] "Ireland" "Germany" "Belgium"
## [283] "United States" "United Arab Emirates" "Malta"
## [286] "Mexico" "France" "Chile"
## [289] "Guatemala" "Saudi Arabia" "Spain"
## [292] "Panama" "Brazil" "Uruguay"
## [295] "Singapore" "El Salvador" "Italy"
## [298] "Bahrain" "Slovakia" "Poland"
## [301] "Uzbekistan" "Lithuania" "Colombia"
## [304] "Slovenia" "Nicaragua" "Kosovo"
## [307] "Argentina" "Romania" "Cyprus"
## [310] "Ecuador" "Kuwait" "Thailand"
## [313] "Latvia" "Estonia" "Jamaica"
## [316] "Mauritius" "Japan" "Honduras"
## [319] "Kazakhstan" "Bolivia" "Hungary"
## [322] "Paraguay" "Peru" "Portugal"
## [325] "Pakistan" "Russia" "Philippines"
## [328] "Serbia" "Moldova" "Libya"
## [331] "Montenegro" "Tajikistan" "Croatia"
## [334] "Dominican Republic" "Bosnia and Herzegovina" "Turkey"
## [337] "Malaysia" "Belarus" "Greece"
## [340] "Mongolia" "Nigeria" "Kyrgyzstan"
## [343] "Turkmenistan" "Algeria" "Morocco"
## [346] "Azerbaijan" "Lebanon" "Indonesia"
## [349] "China" "Vietnam" "Cameroon"
## [352] "Bulgaria" "Ghana" "Nepal"
## [355] "Jordan" "Benin" "Gabon"
## [358] "South Africa" "Albania" "Venezuela"
## [361] "Cambodia" "Senegal" "Niger"
## [364] "Burkina Faso" "Armenia" "Iran"
## [367] "Guinea" "Georgia" "Kenya"
## [370] "Mauritania" "Tunisia" "Bangladesh"
## [373] "Iraq" "Mali" "Sierra Leone"
## [376] "Sri Lanka" "Myanmar" "Chad"
## [379] "Ukraine" "Ethiopia" "Uganda"
## [382] "Egypt" "Zambia" "Togo"
## [385] "India" "Liberia" "Madagascar"
## [388] "Burundi" "Zimbabwe" "Haiti"
## [391] "Botswana" "Malawi" "Yemen"
## [394] "Rwanda" "Tanzania" "Afghanistan"
## [397] "Finland" "Norway" "Denmark"
## [400] "Iceland" "Switzerland" "Netherlands"
## [403] "Canada" "New Zealand" "Sweden"
## [406] "Australia" "United Kingdom" "Austria"
## [409] "Costa Rica" "Ireland" "Germany"
## [412] "Belgium" "Luxembourg" "United States"
## [415] "Israel" "United Arab Emirates" "Malta"
## [418] "France" "Mexico" "Chile"
## [421] "Panama" "Brazil" "Argentina"
## [424] "Guatemala" "Uruguay" "Saudi Arabia"
## [427] "Singapore" "Malaysia" "Spain"
## [430] "Colombia" "Slovakia" "El Salvador"
## [433] "Nicaragua" "Poland" "Bahrain"
## [436] "Uzbekistan" "Kuwait" "Thailand"
## [439] "Italy" "Ecuador" "Lithuania"
## [442] "Slovenia" "Romania" "Latvia"
## [445] "Japan" "Mauritius" "Jamaica"
## [448] "Russia" "Kazakhstan" "Cyprus"
## [451] "Bolivia" "Estonia" "Paraguay"
## [454] "Peru" "Kosovo" "Moldova"
## [457] "Turkmenistan" "Hungary" "Libya"
## [460] "Philippines" "Honduras" "Belarus"
## [463] "Turkey" "Pakistan" "Portugal"
## [466] "Serbia" "Lebanon" "Greece"
## [469] "Montenegro" "Croatia" "Dominican Republic"
## [472] "Algeria" "Morocco" "China"
## [475] "Azerbaijan" "Tajikistan" "Jordan"
## [478] "Nigeria" "Kyrgyzstan" "Bosnia and Herzegovina"
## [481] "Mongolia" "Vietnam" "Indonesia"
## [484] "Cameroon" "Bulgaria" "Nepal"
## [487] "Venezuela" "Gabon" "South Africa"
## [490] "Iran" "Ghana" "Senegal"
## [493] "Tunisia" "Albania" "Sierra Leone"
## [496] "Bangladesh" "Sri Lanka" "Iraq"
## [499] "Mali" "Cambodia" "Burkina Faso"
## [502] "Egypt" "Kenya" "Zambia"
## [505] "Mauritania" "Ethiopia" "Georgia"
## [508] "Armenia" "Myanmar" "Chad"
## [511] "India" "Niger" "Uganda"
## [514] "Benin" "Ukraine" "Togo"
## [517] "Guinea" "Madagascar" "Zimbabwe"
## [520] "Afghanistan" "Botswana" "Malawi"
## [523] "Haiti" "Liberia" "Rwanda"
## [526] "Yemen" "Tanzania" "Burundi"
## [529] "Denmark" "Switzerland" "Iceland"
## [532] "Norway" "Finland" "Canada"
## [535] "Netherlands" "New Zealand" "Australia"
## [538] "Sweden" "Israel" "Austria"
## [541] "United States" "Costa Rica" "Germany"
## [544] "Brazil" "Belgium" "Ireland"
## [547] "Luxembourg" "Mexico" "Singapore"
## [550] "United Kingdom" "Chile" "Panama"
## [553] "Argentina" "United Arab Emirates" "Uruguay"
## [556] "Malta" "Colombia" "France"
## [559] "Thailand" "Saudi Arabia" "Spain"
## [562] "Algeria" "Guatemala" "Kuwait"
## [565] "Bahrain" "Venezuela" "Slovakia"
## [568] "El Salvador" "Malaysia" "Nicaragua"
## [571] "Uzbekistan" "Italy" "Ecuador"
## [574] "Japan" "Kazakhstan" "Moldova"
## [577] "Russia" "Poland" "Bolivia"
## [580] "Lithuania" "Belarus" "Slovenia"
## [583] "Peru" "Turkmenistan" "Mauritius"
## [586] "Libya" "Latvia" "Cyprus"
## [589] "Paraguay" "Romania" "Estonia"
## [592] "Jamaica" "Croatia" "Kosovo"
## [595] "Turkey" "Indonesia" "Jordan"
## [598] "Azerbaijan" "Philippines" "China"
## [601] "Kyrgyzstan" "Serbia" "Bosnia and Herzegovina"
## [604] "Montenegro" "Dominican Republic" "Morocco"
## [607] "Hungary" "Pakistan" "Lebanon"
## [610] "Portugal" "Vietnam" "Tunisia"
## [613] "Greece" "Tajikistan" "Mongolia"
## [616] "Nigeria" "Honduras" "Iran"
## [619] "Zambia" "Nepal" "Albania"
## [622] "Bangladesh" "Sierra Leone" "Iraq"
## [625] "Cameroon" "Ethiopia" "South Africa"
## [628] "Sri Lanka" "India" "Myanmar"
## [631] "Egypt" "Armenia" "Kenya"
## [634] "Ukraine" "Ghana" "Georgia"
## [637] "Senegal" "Bulgaria" "Mauritania"
## [640] "Zimbabwe" "Malawi" "Gabon"
## [643] "Mali" "Haiti" "Botswana"
## [646] "Cambodia" "Niger" "Chad"
## [649] "Burkina Faso" "Uganda" "Yemen"
## [652] "Madagascar" "Tanzania" "Liberia"
## [655] "Guinea" "Rwanda" "Benin"
## [658] "Afghanistan" "Togo" "Burundi"
## [661] "Finland" "Denmark" "Switzerland"
## [664] "Iceland" "Norway" "Netherlands"
## [667] "Sweden" "New Zealand" "Austria"
## [670] "Luxembourg" "Canada" "Australia"
## [673] "United Kingdom" "Israel" "Costa Rica"
## [676] "Ireland" "Germany" "United States"
## [679] "Belgium" "United Arab Emirates" "Malta"
## [682] "France" "Mexico" "Uruguay"
## [685] "Saudi Arabia" "Spain" "Guatemala"
## [688] "Italy" "Singapore" "Brazil"
## [691] "Slovenia" "El Salvador" "Kosovo"
## [694] "Panama" "Slovakia" "Uzbekistan"
## [697] "Chile" "Bahrain" "Lithuania"
## [700] "Poland" "Colombia" "Cyprus"
## [703] "Nicaragua" "Romania" "Kuwait"
## [706] "Mauritius" "Kazakhstan" "Estonia"
## [709] "Philippines" "Hungary" "Thailand"
## [712] "Argentina" "Honduras" "Latvia"
## [715] "Ecuador" "Portugal" "Jamaica"
## [718] "Japan" "Peru" "Serbia"
## [721] "Bolivia" "Pakistan" "Paraguay"
## [724] "Dominican Republic" "Bosnia and Herzegovina" "Moldova"
## [727] "Tajikistan" "Montenegro" "Russia"
## [730] "Kyrgyzstan" "Belarus" "Greece"
## [733] "Croatia" "Libya" "Mongolia"
## [736] "Malaysia" "Vietnam" "Indonesia"
## [739] "Benin" "Azerbaijan" "Ghana"
## [742] "Nepal" "Turkey" "China"
## [745] "Turkmenistan" "Bulgaria" "Morocco"
## [748] "Cameroon" "Venezuela" "Algeria"
## [751] "Senegal" "Guinea" "Niger"
## [754] "Albania" "Cambodia" "Bangladesh"
## [757] "Gabon" "South Africa" "Iraq"
## [760] "Lebanon" "Burkina Faso" "Mali"
## [763] "Nigeria" "Armenia" "Georgia"
## [766] "Iran" "Jordan" "Kenya"
## [769] "Ukraine" "Liberia" "Uganda"
## [772] "Chad" "Tunisia" "Mauritania"
## [775] "Sri Lanka" "Myanmar" "Togo"
## [778] "Ethiopia" "Madagascar" "Egypt"
## [781] "Sierra Leone" "Burundi" "Zambia"
## [784] "Haiti" "India" "Malawi"
## [787] "Yemen" "Botswana" "Tanzania"
## [790] "Rwanda" "Zimbabwe" "Afghanistan"
## # A tibble: 132 × 2
## country sum
## <chr> <int>
## 1 Afghanistan 6
## 2 Albania 6
## 3 Algeria 6
## 4 Argentina 6
## 5 Armenia 6
## 6 Australia 6
## 7 Austria 6
## 8 Azerbaijan 6
## 9 Bahrain 6
## 10 Bangladesh 6
## # ℹ 122 more rows
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'continent', 'country'. You can override
## using the `.groups` argument.
## # A tibble: 132 × 2
## # Groups: continent, country [132]
## continent country
## <chr> <chr>
## 1 Africa Algeria
## 2 Africa Benin
## 3 Africa Botswana
## 4 Africa Burkina Faso
## 5 Africa Burundi
## 6 Africa Cameroon
## 7 Africa Chad
## 8 Africa Egypt
## 9 Africa Ethiopia
## 10 Africa Gabon
## # ℹ 122 more rows
## # A tibble: 132 × 3
## country mean_happiness_score gdp_per_capita
## <chr> <dbl> <dbl>
## 1 Finland 7.58 1.34
## 2 Denmark 7.56 1.39
## 3 Norway 7.53 1.50
## 4 Switzerland 7.52 1.46
## 5 Iceland 7.51 1.38
## 6 Netherlands 7.41 1.40
## 7 Canada 7.33 1.37
## 8 Sweden 7.32 1.39
## 9 New Zealand 7.31 1.30
## 10 Australia 7.27 1.38
## # ℹ 122 more rows
mean_gdp <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), gdp_per_capita = mean(gdp_per_capita)) %>%
arrange(desc(mean_happiness_score))
mean_gdp## # A tibble: 132 × 3
## country mean_happiness_score gdp_per_capita
## <chr> <dbl> <dbl>
## 1 Finland 7.58 1.34
## 2 Denmark 7.56 1.39
## 3 Norway 7.53 1.50
## 4 Switzerland 7.52 1.46
## 5 Iceland 7.51 1.38
## 6 Netherlands 7.41 1.40
## 7 Canada 7.33 1.37
## 8 Sweden 7.32 1.39
## 9 New Zealand 7.31 1.30
## 10 Australia 7.27 1.38
## # ℹ 122 more rows
fit_mean_gdp <- lm(mean_happiness_score ~ gdp_per_capita, data = mean_gdp) #fitting simple linear regression
summary(fit_mean_gdp) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ gdp_per_capita, data = mean_gdp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97429 -0.38927 -0.07531 0.49892 1.42150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.2722 0.1448 22.59 <2e-16 ***
## gdp_per_capita 2.3687 0.1442 16.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6307 on 130 degrees of freedom
## Multiple R-squared: 0.6748, Adjusted R-squared: 0.6723
## F-statistic: 269.7 on 1 and 130 DF, p-value: < 2.2e-16
## [1] 3.272233
## [1] 2.368744
ggplot(mean_gdp, aes(x = gdp_per_capita, y = mean_happiness_score)) +
geom_text_repel(aes(x = gdp_per_capita, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = gdp_per_capita, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_gdp_coef[1,1], slope = mean_gdp_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean GDP Per Capita (Hundreds of Thousands of Dollars)",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean GDP Per Capita") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 2, by = 0.4), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 2, by = 0.1), # Customize x-axis minor breaks (intervals)
limits = c(0, 2), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlines## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
mean_health <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), health = mean(health)) %>%
arrange(desc(mean_happiness_score))
mean_health## # A tibble: 132 × 3
## country mean_happiness_score health
## <chr> <dbl> <dbl>
## 1 Finland 7.58 0.888
## 2 Denmark 7.56 0.884
## 3 Norway 7.53 0.896
## 4 Switzerland 7.52 0.947
## 5 Iceland 7.51 0.932
## 6 Netherlands 7.41 0.895
## 7 Canada 7.33 0.921
## 8 Sweden 7.32 0.914
## 9 New Zealand 7.31 0.911
## 10 Australia 7.27 0.933
## # ℹ 122 more rows
fit_mean_health <- lm(mean_happiness_score ~ health, data = mean_health) #fitting simple linear regression
summary(fit_mean_health) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ health, data = mean_health)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.32965 -0.44722 0.00345 0.48446 1.70080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8723 0.1780 16.14 <2e-16 ***
## health 4.0097 0.2598 15.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6573 on 130 degrees of freedom
## Multiple R-squared: 0.6468, Adjusted R-squared: 0.6441
## F-statistic: 238.1 on 1 and 130 DF, p-value: < 2.2e-16
## [1] 2.872269
## [1] 4.009675
ggplot(mean_health, aes(x = health, y = mean_happiness_score)) +
geom_text_repel(aes(x = health, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = health, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_health_coef[1,1], slope = mean_health_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean Health Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean Health") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 1, by = 0.2), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 1, by = 0.2), # Customize x-axis minor breaks (intervals)
limits = c(0, 1), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlines## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
mean_freedom <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), freedom = mean(freedom)) %>%
arrange(desc(mean_happiness_score))
mean_freedom## # A tibble: 132 × 3
## country mean_happiness_score freedom
## <chr> <dbl> <dbl>
## 1 Finland 7.58 0.628
## 2 Denmark 7.56 0.632
## 3 Norway 7.53 0.643
## 4 Switzerland 7.52 0.622
## 5 Iceland 7.51 0.625
## 6 Netherlands 7.41 0.594
## 7 Canada 7.33 0.616
## 8 Sweden 7.32 0.623
## 9 New Zealand 7.31 0.623
## 10 Australia 7.27 0.608
## # ℹ 122 more rows
fit_mean_freedom <- lm(mean_happiness_score ~ freedom, data = mean_freedom) #fitting simple linear regression
summary(fit_mean_freedom) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ freedom, data = mean_freedom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.79808 -0.62992 0.07073 0.77500 1.70874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.4714 0.2558 13.571 < 2e-16 ***
## freedom 4.6881 0.5704 8.218 1.82e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8972 on 130 degrees of freedom
## Multiple R-squared: 0.3419, Adjusted R-squared: 0.3368
## F-statistic: 67.54 on 1 and 130 DF, p-value: 1.818e-13
## [1] 3.471414
## [1] 4.688101
ggplot(mean_freedom, aes(x = freedom, y = mean_happiness_score)) +
geom_text_repel(aes(x = freedom, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = freedom, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_freedom_coef[1,1], slope = mean_freedom_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean Freedom Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean Freedom") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 1, by = 0.2), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 1, by = 0.2), # Customize x-axis minor breaks (intervals)
limits = c(0, 1), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlinesmean_generosity <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), generosity = mean(generosity)) %>%
arrange(desc(mean_happiness_score))
mean_generosity## # A tibble: 132 × 3
## country mean_happiness_score generosity
## <chr> <dbl> <dbl>
## 1 Finland 7.58 0.208
## 2 Denmark 7.56 0.306
## 3 Norway 7.53 0.322
## 4 Switzerland 7.52 0.276
## 5 Iceland 7.51 0.410
## 6 Netherlands 7.41 0.402
## 7 Canada 7.33 0.372
## 8 Sweden 7.32 0.326
## 9 New Zealand 7.31 0.415
## 10 Australia 7.27 0.401
## # ℹ 122 more rows
fit_mean_generosity <- lm(mean_happiness_score ~ generosity, data = mean_generosity) #fitting simple linear regression
summary(fit_mean_generosity) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ generosity, data = mean_generosity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2334 -0.8421 0.1184 0.8248 2.1170
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1108 0.2009 25.444 <2e-16 ***
## generosity 1.7066 0.8337 2.047 0.0427 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.089 on 130 degrees of freedom
## Multiple R-squared: 0.03123, Adjusted R-squared: 0.02378
## F-statistic: 4.19 on 1 and 130 DF, p-value: 0.04267
## [1] 5.11075
## [1] 1.706647
ggplot(mean_generosity, aes(x = generosity, y = mean_happiness_score)) +
geom_text_repel(aes(x = generosity, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = generosity, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_generosity_coef[1,1], slope = mean_generosity_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean Generosity Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean Generosity") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 1, by = 0.2), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 1, by = 0.2), # Customize x-axis minor breaks (intervals)
limits = c(0, 1), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlinesmean_gov_trust <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), gov_trust = mean(gov_trust)) %>%
arrange(desc(mean_happiness_score))
mean_gov_trust## # A tibble: 132 × 3
## country mean_happiness_score gov_trust
## <chr> <dbl> <dbl>
## 1 Finland 7.58 0.412
## 2 Denmark 7.56 0.440
## 3 Norway 7.53 0.359
## 4 Switzerland 7.52 0.384
## 5 Iceland 7.51 0.141
## 6 Netherlands 7.41 0.310
## 7 Canada 7.33 0.313
## 8 Sweden 7.32 0.405
## 9 New Zealand 7.31 0.410
## 10 Australia 7.27 0.318
## # ℹ 122 more rows
fit_mean_gov_trust <- lm(mean_happiness_score ~ gov_trust, data = mean_gov_trust) #fitting simple linear regression
summary(fit_mean_gov_trust) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ gov_trust, data = mean_gov_trust)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8063 -0.7154 0.1606 0.7288 1.9607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8441 0.1321 36.672 < 2e-16 ***
## gov_trust 5.0049 0.8071 6.201 6.92e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9716 on 130 degrees of freedom
## Multiple R-squared: 0.2283, Adjusted R-squared: 0.2223
## F-statistic: 38.45 on 1 and 130 DF, p-value: 6.922e-09
## [1] 4.844098
## [1] 5.004884
ggplot(mean_gov_trust, aes(x = gov_trust, y = mean_happiness_score)) +
geom_text_repel(aes(x = gov_trust, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = gov_trust, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_gov_trust_coef[1,1], slope = mean_gov_trust_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean Government Trust Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean Government Trust") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 1, by = 0.2), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 1, by = 0.2), # Customize x-axis minor breaks (intervals)
limits = c(0, 1), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlines## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
mean_social_support <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), social_support = mean(social_support)) %>%
arrange(desc(mean_happiness_score))
mean_social_support## # A tibble: 132 × 3
## country mean_happiness_score social_support
## <chr> <dbl> <dbl>
## 1 Finland 7.58 0.780
## 2 Denmark 7.56 0.778
## 3 Norway 7.53 0.777
## 4 Switzerland 7.52 0.758
## 5 Iceland 7.51 0.803
## 6 Netherlands 7.41 0.746
## 7 Canada 7.33 0.745
## 8 Sweden 7.32 0.737
## 9 New Zealand 7.31 0.774
## 10 Australia 7.27 0.766
## # ℹ 122 more rows
fit_mean_social_support <- lm(mean_happiness_score ~ social_support, data = mean_social_support) #fitting simple linear regression
summary(fit_mean_social_support) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ social_support, data = mean_social_support)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65282 -0.47291 -0.00254 0.43672 1.61483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6163 0.2691 6.006 1.78e-08 ***
## social_support 6.3302 0.4309 14.690 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6781 on 130 degrees of freedom
## Multiple R-squared: 0.6241, Adjusted R-squared: 0.6212
## F-statistic: 215.8 on 1 and 130 DF, p-value: < 2.2e-16
mean_social_support_coef <- coef(summary(fit_mean_social_support))
mean_social_support_coef[1,1] #Intercept## [1] 1.616323
## [1] 6.330171
ggplot(mean_social_support, aes(x = social_support, y = mean_happiness_score)) +
geom_text_repel(aes(x = social_support, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = social_support, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_social_support_coef[1,1], slope = mean_social_support_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean Social Support Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean Support") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 1, by = 0.2), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 1, by = 0.2), # Customize x-axis minor breaks (intervals)
limits = c(0, 1), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlines## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
mean_cpi_score <- hap_data %>%
group_by(country) %>%
reframe(mean_happiness_score = mean(happiness_score), cpi_score = mean(cpi_score)) %>%
arrange(desc(mean_happiness_score))
mean_cpi_score## # A tibble: 132 × 3
## country mean_happiness_score cpi_score
## <chr> <dbl> <dbl>
## 1 Finland 7.58 86.7
## 2 Denmark 7.56 88.7
## 3 Norway 7.53 85
## 4 Switzerland 7.52 85.3
## 5 Iceland 7.51 77.2
## 6 Netherlands 7.41 82.5
## 7 Canada 7.33 80.3
## 8 Sweden 7.32 86
## 9 New Zealand 7.31 88.7
## 10 Australia 7.27 77.7
## # ℹ 122 more rows
fit_mean_cpi_score <- lm(mean_happiness_score ~ cpi_score, data = mean_cpi_score) #fitting simple linear regression
summary(fit_mean_cpi_score) #summary of fit##
## Call:
## lm(formula = mean_happiness_score ~ cpi_score, data = mean_cpi_score)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.45955 -0.59362 0.04873 0.43105 1.80012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.684555 0.168154 21.91 <2e-16 ***
## cpi_score 0.040354 0.003475 11.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7749 on 130 degrees of freedom
## Multiple R-squared: 0.5092, Adjusted R-squared: 0.5054
## F-statistic: 134.9 on 1 and 130 DF, p-value: < 2.2e-16
## [1] 3.684555
## [1] 0.0403536
ggplot(mean_cpi_score, aes(x = cpi_score, y = mean_happiness_score)) +
geom_text_repel(aes(x = cpi_score, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
geom_point(aes(x = cpi_score, y = mean_happiness_score), color = "#ee82ee") +
geom_abline(intercept = mean_cpi_score_coef[1,1], slope = mean_cpi_score_coef[2,1], color = "white") +
theme(panel.background = element_rect(fill = "#112333"))+ labs(
x = "Mean CPI Score Coefficient",
y = "Mean Happiness Score 2015-2020") +
ggtitle("Mean Happiness Score vs Mean CPI Score") + # Set the title using ggtitle()
theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) +
scale_x_continuous(breaks = seq(0, 100, by = 20), # Customize x-axis breaks (intervals)
minor_breaks = seq(0, 100, by = 20), # Customize x-axis minor breaks (intervals)
limits = c(0, 100), # Customize x-axis limits
expand = c(0, 0)) + # Remove extra space around the x-axis
scale_y_continuous(breaks = seq(2, 8, by = 1), # Customize y-axis breaks (intervals)
minor_breaks = seq(2, 8, by = 0.5), # Customize y-axis minor breaks (intervals)
limits = c(2, 8), # Customize y-axis limits
expand = c(0, 0)) + # Remove extra space around the y-axis
theme(panel.grid.major = element_line(color = "beige", size = 0.5), # Customize major gridlines
panel.grid.minor = element_line(color = "beige", size = 0.25)) # Customize minor gridlines## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
final.fit.1 <- lm(happiness_score ~ gdp_per_capita + health + freedom + generosity + gov_trust + social_support, data = hap_data)
summary(final.fit.1)##
## Call:
## lm(formula = happiness_score ~ gdp_per_capita + health + freedom +
## generosity + gov_trust + social_support, data = hap_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.88535 -0.36630 0.04372 0.35278 1.39314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.33227 0.07712 30.241 < 2e-16 ***
## gdp_per_capita 1.42153 0.08664 16.408 < 2e-16 ***
## health 1.30082 0.15062 8.636 < 2e-16 ***
## freedom 1.58766 0.16875 9.408 < 2e-16 ***
## generosity 0.85785 0.18438 4.653 3.84e-06 ***
## gov_trust 0.73820 0.21887 3.373 0.00078 ***
## social_support 0.03823 0.03620 1.056 0.29127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5598 on 785 degrees of freedom
## Multiple R-squared: 0.7541, Adjusted R-squared: 0.7522
## F-statistic: 401.3 on 6 and 785 DF, p-value: < 2.2e-16
moving_plot_gdp <- ggplot(data = hap_data, aes(x = gdp_per_capita + health + freedom + generosity + gov_trust + social_support, y = happiness_score, frame = year, color = hap_data$continent), color = hap_data$continent) + geom_point() + stat_smooth(method = "lm", se = TRUE, color = "white", aes(group = 0)) + labs(title = "Happiness Score VS WHR Indicators", x = "Happiness Indicators", y = "Happiness ", fill = "Continent") + theme(panel.background = element_rect(fill = "#112333")) + theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4))
ggplotly(moving_plot_gdp)## `geom_smooth()` using formula = 'y ~ x'
## [1] "timestamp" "email" "program" "in_usa"
## [5] "gdp_per_capita" "health" "freedom" "generosity"
## [9] "gov_trust" "social_support"
## [1] "gdp_per_capita" "health" "freedom" "generosity"
## [5] "gov_trust" "social_support"
## 1 2 3 4 5 6 7 8
## 4.2712389 5.4004642 4.3142264 5.0789877 0.8276564 3.4502691 3.6792132 4.8106825
## 9 10 11 12 13 14 15 16
## 4.3092866 3.6661205 5.1273201 4.3932857 4.6490500 4.7945651 4.7074604 3.3495892
## 17 18 19 20 21 22 23 24
## 5.3385238 2.9089960 4.6344644 3.6985214 4.7494914 4.6104437 5.7216430 4.2838838
## 25 26 27 28 29 30 31 32
## 3.1050383 4.1434699 5.3555405 3.9569297 4.4425588 4.4726997 3.8293803 2.9839383
## 33 34 35 36 37 38 39 40
## 5.2169286 4.6862640 3.7781641 4.7221960 4.5802769 3.2452608 4.0727016 5.3641337
## 41 42 43 44 45 46 47 48
## 4.3212359 2.9268739 4.3942319 3.2882601 4.5400056 5.3311314 4.9397015 2.9826032
## 49 50 51 52 53 54 55 56
## 5.2010340 4.0858011 2.9971318 4.7035151 3.1409546 4.8337188 4.2561842 4.0598598
## 57 58 59 60 61 62 63 64
## 2.7466891 4.6270038 4.8122404 4.0731540 3.4658190 3.9614710 3.7067258 4.0039275
## 65 66 67 68 69 70 71 72
## 3.0593500 4.6603407 3.9563126 3.5646947 4.8853171 3.9930690 3.7064659 3.6494922
## 73 74 75 76 77 78 79 80
## 3.7341241 3.4018144 3.8259488 4.2883937 3.8434070 3.1855746 2.6564948 6.1775197
## 81 82 83 84 85 86 87 88
## 3.8858008 2.3343596 4.7007702 3.2170191 3.5075306 2.4602394 4.0740464 4.2598788
## 89 90 91 92 93 94 95 96
## 4.1031132 3.5979791 4.7520493 4.7635529 3.1065962 4.0316651 4.5708387 4.7820793
## 97 98 99 100 101 102 103 104
## 3.6809625 3.8887941 3.1393162 4.9314691 5.5620374 2.4784469 4.3438466 2.9046932
## 105 106 107 108 109 110 111 112
## 3.7659079 4.5944271 4.7756174 4.6638819 3.5598625 3.8869328 3.1336316 3.7552932
## 113 114 115 116 117 118 119 120
## 3.6949890 5.8637961 3.8421158 3.8991318 5.4319557 2.8382497 3.9025296 4.5586608
## 121 122 123 124 125 126 127
## 3.3825480 4.3576829 3.9874907 4.3371407 3.6136455 4.5301997 4.5539540
## [1] 4.08445
## [1] 6.17752
## [1] 0.8276564
wgyp_dsa <- wgyp %>%
filter(program == "Data Science Academy")
wgyp_pred_dsa <- predict(final.fit.1, wgyp_dsa)
wgyp_pred_dsa <- wgyp_pred_dsa/10
mean(wgyp_pred_dsa)## [1] 4.284861
## [1] 5.721643
## [1] 0.8276564
wgyp_lbw <- wgyp %>%
filter(program == "Leadership in the Business World")
wgyp_pred_lbw <- predict(final.fit.1, wgyp_lbw)
wgyp_pred_lbw <- wgyp_pred_lbw/10
mean(wgyp_pred_lbw)## [1] 3.961103
## [1] 6.17752
## [1] 2.33436
wgyp_usa <- wgyp %>%
filter(in_usa == "Yes")
wgyp_pred_usa <- predict(final.fit.1, wgyp_usa)
wgyp_pred_usa <- wgyp_pred_usa/10
mean(wgyp_pred_usa)## [1] 4.056982
## [1] 6.17752
## [1] 0.8276564
wgyp_nusa <- wgyp %>%
filter(in_usa == "No")
wgyp_pred_nusa <- predict(final.fit.1, wgyp_nusa)
wgyp_pred_nusa <- wgyp_pred_nusa/10
mean(wgyp_pred_nusa)## [1] 4.138108
## [1] 5.863796
## [1] 2.478447
## [1] 77
na_counts <- colSums(is.na(country_data))
na_cols <- select(country_data,names(na_counts[na_counts > 0]))
na.vals <- colSums(is.na(na_cols))
na.vals## agricultural_land land_area armed_forces
## 2 1 3
## birth_rate carbon_emissions cpi
## 2 2 3
## cpi_change fertility_rate forested_area
## 3 2 2
## gasoline_price gdp primary_edu_enrollment
## 3 1 3
## tertiary_edu_enrollment infant_mortality life_expectancy
## 3 2 2
## maternal_mortality minimum_wage pocket_health_expenditure
## 2 20 1
## physicians_per_thousand population labor_force
## 2 1 2
## tax_revenue total_tax unemployment_rate
## 9 2 2
## urban_population
## 2
na_row_tf <- apply(is.na(country_data), 1, any)
na_rows <- which(na_row_tf)
na_row_tf <- !na_row_tf
no_na_rows <- which(na_row_tf)
na_rows <- country_data[na_rows, ]
na_rows## happiness_score country density agricultural_land land_area
## 1: 7.097 Austria 109 32.4 83871
## 2: 6.173 Bahrain 2239 11.1 765
## 3: 5.684 Bolivia 11 34.8 1098581
## 4: 5.633 Bosnia and Herzegovina 64 43.1 51197
## 5: 4.393 Cambodia 95 30.9 181035
## 6: 4.397 Chad 13 39.7 1284000
## 7: 3.545 Comoros 467 71.5 2235
## 8: 6.130 Cyprus 131 12.2 9251
## 9: 7.586 Denmark 137 62.0 43094
## 10: 5.559 Ecuador 71 22.2 283561
## 11: 4.170 Egypt 103 3.8 1001450
## 12: 4.091 Ethiopia 115 36.3 1104300
## 13: 7.804 Finland 18 7.5 338145
## 14: 5.072 Guinea 53 59.0 245857
## 15: 7.530 Iceland 3 18.7 103000
## 16: 6.405 Italy 206 43.2 301340
## 17: 4.724 Mauritania 5 38.5 1030700
## 18: 5.722 Montenegro 47 19.0 13812
## 19: 4.631 Namibia 3 47.1 824292
## 20: 5.254 North Macedonia 83 NA 25713
## 21: 7.315 Norway 15 2.7 323802
## 22: 6.265 Panama 58 30.4 75420
## 23: 6.587 Singapore 8358 0.9 716
## 24: 5.275 South Africa 49 79.8 1219090
## 25: 4.908 State of Palestine 847 NA NA
## 26: 7.395 Sweden 25 7.4 450295
## 27: 7.240 Switzerland 219 38.4 41277
## 28: 6.014 Uzbekistan 79 62.9 447400
## 29: 5.211 Venezuela 32 24.5 912050
## 30: 3.204 Zimbabwe 38 41.9 390757
## happiness_score country density agricultural_land land_area
## armed_forces birth_rate carbon_emissions cpi cpi_change fertility_rate
## 1: 21000 9.70 61448 118.06 1.5 1.47
## 2: 19000 13.99 31694 117.59 2.1 1.99
## 3: 71000 21.75 21606 148.32 1.8 2.73
## 4: 11000 8.11 21848 104.90 0.6 1.27
## 5: 191000 22.46 9919 127.63 2.5 2.50
## 6: 35000 42.17 1016 117.70 -1.0 5.75
## 7: NA 31.88 202 103.62 -4.3 4.21
## 8: 16000 10.46 6626 102.51 0.3 1.33
## 9: 15000 10.60 31786 110.35 0.8 1.73
## 10: 41000 19.72 41155 124.14 0.3 2.43
## 11: 836000 26.38 238560 288.57 9.2 3.33
## 12: 138000 32.34 14870 143.86 15.8 4.25
## 13: 25000 8.60 45871 112.33 1.0 1.41
## 14: 13000 36.36 2996 262.95 9.5 4.70
## 15: 0 12.00 2065 129.00 3.0 1.71
## 16: 347000 7.30 320411 110.62 0.6 1.29
## 17: 21000 33.69 2739 135.02 2.3 4.56
## 18: 12000 11.73 2017 116.32 2.6 1.75
## 19: 16000 28.64 4228 157.97 3.7 3.40
## 20: NA NA NA NA NA NA
## 21: 23000 10.40 41023 120.27 2.2 1.56
## 22: 26000 18.98 10715 122.07 -0.4 2.46
## 23: 81000 8.80 37535 114.41 0.6 1.14
## 24: 80000 20.51 476644 158.93 4.1 2.41
## 25: NA NA NA NA NA NA
## 26: 30000 11.40 43252 110.51 1.8 1.76
## 27: 21000 10.00 34477 99.55 0.4 1.52
## 28: 68000 23.30 91811 NA NA 2.42
## 29: 343000 17.88 164175 2740.27 254.9 2.27
## 30: 51000 30.68 10983 105.51 0.9 3.62
## armed_forces birth_rate carbon_emissions cpi cpi_change fertility_rate
## forested_area gasoline_price gdp primary_edu_enrollment
## 1: 46.9 1.20 4.463147e+11 103.1
## 2: 0.8 0.43 3.857407e+10 99.4
## 3: 50.3 0.71 4.089532e+10 98.2
## 4: 42.7 1.05 2.004785e+10 NA
## 5: 52.9 0.90 2.708939e+10 107.4
## 6: 3.8 0.78 1.131495e+10 86.8
## 7: 19.7 NA 1.185729e+09 99.5
## 8: 18.7 1.23 2.456465e+10 99.3
## 9: 14.7 1.55 3.480780e+11 101.3
## 10: 50.2 0.61 1.074357e+11 103.3
## 11: 0.1 0.40 3.031751e+11 106.3
## 12: 12.5 0.75 9.610766e+10 101.0
## 13: 73.1 1.45 2.687612e+11 100.2
## 14: 25.8 0.90 1.359028e+10 91.5
## 15: 0.5 1.69 2.418804e+10 100.4
## 16: 31.8 1.61 2.001244e+12 101.9
## 17: 0.2 1.13 7.593752e+09 99.9
## 18: 61.5 1.16 5.494737e+09 100.0
## 19: 8.3 0.76 1.236653e+10 124.2
## 20: NA NA 1.022078e+10 NA
## 21: 33.2 1.78 4.033364e+11 100.3
## 22: 61.9 0.74 6.680080e+10 94.4
## 23: 23.1 1.25 3.720625e+11 100.6
## 24: 7.6 0.92 3.514316e+11 100.9
## 25: NA NA NA NA
## 26: 68.9 1.42 5.308329e+11 126.6
## 27: 31.8 1.45 7.030824e+11 105.2
## 28: 7.5 1.03 5.792129e+10 104.2
## 29: 52.7 0.00 4.823593e+11 97.2
## 30: 35.5 1.34 2.144076e+10 109.9
## forested_area gasoline_price gdp primary_edu_enrollment
## tertiary_edu_enrollment infant_mortality life_expectancy maternal_mortality
## 1: 85.1 2.9 81.6 5
## 2: 50.5 6.1 77.2 14
## 3: NA 21.8 71.2 155
## 4: 23.3 5.0 77.3 10
## 5: 13.7 24.0 69.6 160
## 6: 3.3 71.4 54.0 1140
## 7: 9.0 51.3 64.1 273
## 8: 75.9 1.9 80.8 6
## 9: 80.6 3.6 81.0 4
## 10: 44.9 12.2 76.8 59
## 11: 35.2 18.1 71.8 37
## 12: 8.1 39.1 66.2 401
## 13: 88.2 1.4 81.7 3
## 14: 11.6 64.9 61.2 576
## 15: 71.8 1.5 82.7 4
## 16: 61.9 2.6 82.9 2
## 17: 5.0 51.5 64.7 766
## 18: 56.1 2.3 76.8 6
## 19: 22.9 29.0 63.4 195
## 20: NA NA NA NA
## 21: 82.0 2.1 82.8 2
## 22: 47.8 13.1 78.3 52
## 23: 84.8 2.3 83.1 8
## 24: 22.4 28.5 63.9 119
## 25: NA NA NA NA
## 26: 67.0 2.2 82.5 4
## 27: 59.6 3.7 83.6 5
## 28: 10.1 19.1 71.6 29
## 29: 79.3 21.4 72.1 125
## 30: 10.0 33.9 61.2 458
## tertiary_edu_enrollment infant_mortality life_expectancy maternal_mortality
## minimum_wage pocket_health_expenditure physicians_per_thousand population
## 1: NA 17.9 5.17 8877067
## 2: NA 25.1 0.93 1501635
## 3: 1.36 25.9 1.59 11513100
## 4: 1.04 28.6 2.16 3301000
## 5: NA 59.4 0.17 16486542
## 6: 0.60 56.4 0.04 15946876
## 7: 0.71 74.8 0.27 850886
## 8: NA 43.9 1.95 1198575
## 9: NA 13.7 4.01 5818553
## 10: 2.46 43.7 2.04 17373662
## 11: NA 62.0 0.45 100388073
## 12: NA 37.8 0.08 112078730
## 13: NA 19.9 3.81 5520314
## 14: NA 54.5 0.08 12771246
## 15: NA 17.0 4.08 361313
## 16: NA 22.8 3.98 60297396
## 17: 0.53 48.2 0.19 4525696
## 18: 1.23 31.8 2.76 622137
## 19: NA 8.3 0.42 2494530
## 20: NA 35.6 NA 1836713
## 21: NA 14.3 2.92 5347896
## 22: 1.53 30.5 1.57 4246439
## 23: NA 36.7 2.29 5703569
## 24: NA 7.7 0.91 58558270
## 25: NA NA NA NA
## 26: NA 15.2 3.98 10285453
## 27: NA 28.3 4.30 8574832
## 28: 0.24 42.7 2.37 33580650
## 29: 0.01 45.8 1.92 28515829
## 30: NA 25.8 0.21 14645468
## minimum_wage pocket_health_expenditure physicians_per_thousand population
## labor_force tax_revenue total_tax unemployment_rate urban_population
## 1: 60.7 25.4 51.4 4.67 5194416
## 2: 73.4 4.2 13.8 0.71 1467109
## 3: 71.8 17.0 83.7 3.50 8033035
## 4: 46.4 20.4 23.7 18.42 1605144
## 5: 82.3 17.1 23.1 0.68 3924621
## 6: 70.7 NA 63.5 1.89 3712273
## 7: 43.3 NA 219.6 4.34 248152
## 8: 63.1 24.5 22.4 7.27 800708
## 9: 62.2 32.4 23.8 4.91 5119978
## 10: 68.0 NA 34.4 3.97 11116711
## 11: 46.4 12.5 44.4 10.76 42895824
## 12: 79.6 7.5 37.7 2.08 23788710
## 13: 59.1 20.8 36.6 6.59 4716888
## 14: 61.5 10.8 69.3 4.30 4661505
## 15: 75.0 23.3 31.9 2.84 339110
## 16: 49.6 24.3 59.1 9.89 42651966
## 17: 45.9 NA 67.0 9.55 2466821
## 18: 54.4 NA 22.2 14.88 417765
## 19: 59.5 27.1 20.7 20.27 1273258
## 20: NA NA NA NA NA
## 21: 63.8 23.9 36.2 3.35 4418218
## 22: 66.6 NA 37.2 3.90 2890084
## 23: 70.5 13.1 21.0 4.11 5703569
## 24: 56.0 27.5 29.2 28.18 39149717
## 25: NA NA NA NA NA
## 26: 64.6 27.9 49.1 6.48 9021165
## 27: 68.3 10.1 28.8 4.58 6332428
## 28: 65.1 14.8 31.6 5.92 16935729
## 29: 59.7 NA 73.3 8.80 25162368
## 30: 83.1 20.7 31.6 4.95 4717305
## labor_force tax_revenue total_tax unemployment_rate urban_population
## latitude longitude
## 1: 47.516231 14.550072
## 2: 26.066700 50.557700
## 3: -16.290154 -63.588653
## 4: 43.915886 17.679076
## 5: 12.565679 104.990963
## 6: 15.454166 18.732207
## 7: -11.645500 43.333300
## 8: 35.126413 33.429859
## 9: 56.263920 9.501785
## 10: -1.831239 -78.183406
## 11: 26.820553 30.802498
## 12: 9.145000 40.489673
## 13: 61.924110 25.748151
## 14: 9.945587 -9.696645
## 15: 64.963051 -19.020835
## 16: 41.871940 12.567380
## 17: 21.007890 -10.940835
## 18: 42.708678 19.374390
## 19: -22.957640 18.490410
## 20: 41.608635 21.745275
## 21: 60.472024 8.468946
## 22: 8.537981 -80.782127
## 23: 1.352083 103.819836
## 24: -30.559482 22.937506
## 25: 31.952162 35.233154
## 26: 60.128161 18.643501
## 27: 46.818188 8.227512
## 28: 41.377491 64.585262
## 29: 6.423750 -66.589730
## 30: -19.015438 29.154857
## latitude longitude
## [1] 0
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
set.seed(7)
# Split the data into predictors (X) and target variable (Y)
X <- no_na_country_data %>% select(-happiness_score)
Y <- no_na_country_data$happiness_score
# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
train_target <- Y[train_index]
test_target <- Y[-train_index]
# Create a random forest model
rf_model <- randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)
# Cross Validation
ctrl <- trainControl(method = "cv", number = 5) # 5-fold
# Cross-validate the random forest model
cv_results <- train(x = X, y = Y, method = "rf", trControl = ctrl, tuneGrid = expand.grid(mtry = 2), preProc = c("center", "scale"))
# Print the summary of the random forest model
rf_model##
## Call:
## randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.5575185
## % Var explained: 51.16
## Random Forest
##
## 103 samples
## 29 predictor
##
## Pre-processing: centered (28), scaled (28), ignore (1)
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 82, 83, 83, 82, 82
## Resampling results:
##
## RMSE Rsquared MAE
## 0.7093225 0.6263857 0.5017048
##
## Tuning parameter 'mtry' was held constant at a value of 2
# Print a plot of predictions vs. actuals
predictions <- predict(rf_model, newdata = test_data)
plot(test_target, predictions)# This uses only the variables significant to 95%
library(randomForest)
library(caret)
set.seed(7)
significant_columns <- c("armed_forces", "forested_area", "tertiary_edu_enrollment", "life_expectancy", "maternal_mortality", "minimum_wage", "unemployment_rate", "urban_population", "longitude" )
# Split the data into predictors (X) and target variable (Y)
X <- select(no_na_country_data, all_of(significant_columns))
Y <- no_na_country_data$happiness_score
# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
train_target <- Y[train_index]
test_target <- Y[-train_index]
# Create a random forest model
rf_model <- randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)
# Cross Validation
ctrl <- trainControl(method = "cv", number = 5) # 5-fold
# Cross-validate the random forest model
cv_results <- train(x = X, y = Y, method = "rf", trControl = ctrl, tuneGrid = expand.grid(mtry = 2), preProc = c("center", "scale"))
# Print the summary of the random forest model
rf_model##
## Call:
## randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 3
##
## Mean of squared residuals: 0.6707164
## % Var explained: 41.24
## Random Forest
##
## 103 samples
## 9 predictor
##
## Pre-processing: centered (9), scaled (9)
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 82, 83, 83, 83, 81
## Resampling results:
##
## RMSE Rsquared MAE
## 0.7488524 0.5818441 0.5404693
##
## Tuning parameter 'mtry' was held constant at a value of 2
# Print a plot of predictions vs. actuals
predictions <- predict(rf_model, newdata = test_data)
plot(test_target, predictions)library(tree)
library(randomForest)
library(caret)
set.seed(7)
# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
happiness_score <- Y[train_index]
test_target <- Y[-train_index]
train <- cbind(happiness_score,train_data)
# Create a decision tree model
fit1 <- tree(happiness_score ~ ., data = train)
# Print the tree model
print(fit1)## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 75 85.6200 5.527
## 2) minimum_wage < 2.2 53 43.7900 5.070
## 4) tertiary_edu_enrollment < 28.3 30 27.1600 4.653
## 8) forested_area < 24.6 10 4.5330 4.084
## 16) forested_area < 8.75 5 0.4065 4.453 *
## 17) forested_area > 8.75 5 2.7670 3.715 *
## 9) forested_area > 24.6 20 17.7700 4.938
## 18) maternal_mortality < 498.5 15 10.3600 5.242
## 36) minimum_wage < 0.465 9 4.5420 4.831 *
## 37) minimum_wage > 0.465 6 2.0170 5.858 *
## 19) maternal_mortality > 498.5 5 1.8600 4.025 *
## 5) tertiary_edu_enrollment > 28.3 23 4.6440 5.613
## 10) unemployment_rate < 8.805 13 0.9247 5.823 *
## 11) unemployment_rate > 8.805 10 2.4030 5.340
## 22) forested_area < 22.4 5 0.4960 5.023 *
## 23) forested_area > 22.4 5 0.9016 5.657 *
## 3) minimum_wage > 2.2 22 4.0830 6.628
## 6) minimum_wage < 7.175 14 1.0690 6.377 *
## 7) minimum_wage > 7.175 8 0.5869 7.067 *
# Make predictions on the testing data
test_predictions <- predict(fit1, newdata = test_data)
# Calculate RMSE and R-squared for testing
test_rmse <- sqrt(mean((test_target - test_predictions)^2))
test_r_squared <- 1 - sum((test_target - test_predictions)^2) / sum((test_target - mean(test_target))^2)
cat("Testing RMSE:", test_rmse, "\n")## Testing RMSE: 0.7848045
## Testing R-squared: 0.5428855